{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1c72db02",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['HF_HOME'] = \"/workspace/cache\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4fb1178f",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers.trainer_utils import get_last_checkpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "1cb98076",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/workspace/Qwen-Qwen2.5-14B-Instruct-lora-128-embedding-8k-multipack/checkpoint-3260'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "latest = get_last_checkpoint(\"/workspace/Qwen-Qwen2.5-14B-Instruct-lora-128-embedding-8k-multipack\")\n",
    "latest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "84ac088c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer\n",
    "from peft import LoraConfig, get_peft_model, AutoPeftModelForCausalLM, PeftModel\n",
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "24d278b7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e4f8e41ee81b47c5a7b89cf5fd9e4f61",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "Qwen2ForCausalLM(\n",
       "  (model): Qwen2Model(\n",
       "    (embed_tokens): Embedding(152064, 5120)\n",
       "    (layers): ModuleList(\n",
       "      (0-47): 48 x Qwen2DecoderLayer(\n",
       "        (self_attn): Qwen2SdpaAttention(\n",
       "          (q_proj): Linear(in_features=5120, out_features=5120, bias=True)\n",
       "          (k_proj): Linear(in_features=5120, out_features=1024, bias=True)\n",
       "          (v_proj): Linear(in_features=5120, out_features=1024, bias=True)\n",
       "          (o_proj): Linear(in_features=5120, out_features=5120, bias=False)\n",
       "          (rotary_emb): Qwen2RotaryEmbedding()\n",
       "        )\n",
       "        (mlp): Qwen2MLP(\n",
       "          (gate_proj): Linear(in_features=5120, out_features=13824, bias=False)\n",
       "          (up_proj): Linear(in_features=5120, out_features=13824, bias=False)\n",
       "          (down_proj): Linear(in_features=13824, out_features=5120, bias=False)\n",
       "          (act_fn): SiLU()\n",
       "        )\n",
       "        (input_layernorm): Qwen2RMSNorm((5120,), eps=1e-06)\n",
       "        (post_attention_layernorm): Qwen2RMSNorm((5120,), eps=1e-06)\n",
       "      )\n",
       "    )\n",
       "    (norm): Qwen2RMSNorm((5120,), eps=1e-06)\n",
       "    (rotary_emb): Qwen2RotaryEmbedding()\n",
       "  )\n",
       "  (lm_head): Linear(in_features=5120, out_features=152064, bias=False)\n",
       ")"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-14B-Instruct', device_map = 'auto',\n",
    "                                             torch_dtype = torch.bfloat16)\n",
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "8f843326",
   "metadata": {},
   "outputs": [],
   "source": [
    "m = PeftModel.from_pretrained(model, latest, device_map = 'auto', torch_dtype = torch.bfloat16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "a1a2377f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.10/dist-packages/peft/utils/save_and_load.py:230: UserWarning: Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.\n",
      "  warnings.warn(\"Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`.\")\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d9ea02dfbf914bf9bc245f9185bcd5a9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "adapter_model.safetensors:   0%|          | 0.00/5.48G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/mesolitica/Qwen-Qwen2.5-14B-Instruct-lora-128-embedding-8k-multipack/commit/753f5d3ae5db35256f00824956249b3b0657510e', commit_message='Upload model', commit_description='', oid='753f5d3ae5db35256f00824956249b3b0657510e', pr_url=None, repo_url=RepoUrl('https://huggingface.co/mesolitica/Qwen-Qwen2.5-14B-Instruct-lora-128-embedding-8k-multipack', endpoint='https://huggingface.co', repo_type='model', repo_id='mesolitica/Qwen-Qwen2.5-14B-Instruct-lora-128-embedding-8k-multipack'), pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m.push_to_hub('mesolitica/Qwen-Qwen2.5-14B-Instruct-lora-128-embedding-8k-multipack', private = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "c19e398c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Qwen2ForCausalLM(\n",
       "  (model): Qwen2Model(\n",
       "    (embed_tokens): Embedding(152064, 5120)\n",
       "    (layers): ModuleList(\n",
       "      (0-47): 48 x Qwen2DecoderLayer(\n",
       "        (self_attn): Qwen2SdpaAttention(\n",
       "          (q_proj): Linear(in_features=5120, out_features=5120, bias=True)\n",
       "          (k_proj): Linear(in_features=5120, out_features=1024, bias=True)\n",
       "          (v_proj): Linear(in_features=5120, out_features=1024, bias=True)\n",
       "          (o_proj): Linear(in_features=5120, out_features=5120, bias=False)\n",
       "          (rotary_emb): Qwen2RotaryEmbedding()\n",
       "        )\n",
       "        (mlp): Qwen2MLP(\n",
       "          (gate_proj): Linear(in_features=5120, out_features=13824, bias=False)\n",
       "          (up_proj): Linear(in_features=5120, out_features=13824, bias=False)\n",
       "          (down_proj): Linear(in_features=13824, out_features=5120, bias=False)\n",
       "          (act_fn): SiLU()\n",
       "        )\n",
       "        (input_layernorm): Qwen2RMSNorm((5120,), eps=1e-06)\n",
       "        (post_attention_layernorm): Qwen2RMSNorm((5120,), eps=1e-06)\n",
       "      )\n",
       "    )\n",
       "    (norm): Qwen2RMSNorm((5120,), eps=1e-06)\n",
       "    (rotary_emb): Qwen2RotaryEmbedding()\n",
       "  )\n",
       "  (lm_head): Linear(in_features=5120, out_features=152064, bias=False)\n",
       ")"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m = m.merge_and_unload()\n",
    "m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "7ff4b3a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(latest)\n",
    "tokenizer.chat_template = tokenizer.chat_template.replace('You are Qwen, created by Alibaba Cloud', 'You are MaLLaM, created by Mesolitica')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "88785362",
   "metadata": {},
   "outputs": [],
   "source": [
    "streamer = TextStreamer(tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "f6624489",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.10/dist-packages/transformers/generation/configuration_utils.py:628: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.0` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
      "  warnings.warn(\n",
      "/usr/local/lib/python3.10/dist-packages/transformers/generation/configuration_utils.py:633: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.95` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
      "  warnings.warn(\n",
      "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<|im_start|>system\n",
      "You are MaLLaM, created by Mesolitica. You are a helpful assistant.<|im_end|>\n",
      "<|im_start|>user\n",
      "camne nk selesaikan masalah hutang negara<|im_end|>\n",
      "<|im_start|>assistant\n",
      "Untuk menyelesaikan masalah hutang negara, beberapa langkah yang boleh diambil ialah:\n",
      "\n",
      "1. Mengurangkan perbelanjaan kerajaan - Kerajaan perlu mengurangkan perbelanjaan yang tidak perlu dan meningkatkan kecekapan dalam penggunaan dana awam.\n",
      "\n",
      "2. Meningkatkan pendapatan kerajaan - Kerajaan perlu meningkatkan pendapatan melalui pelbagai cara seperti meningkatkan cukai, memperkenalkan cukai baru, atau meningkatkan hasil daripada\n"
     ]
    }
   ],
   "source": [
    "d = [\n",
    "    {'role': 'user', 'content': 'camne nk selesaikan masalah hutang negara'}\n",
    "]\n",
    "\n",
    "with torch.no_grad():\n",
    "    inputs = tokenizer.apply_chat_template(d, return_tensors = 'pt').to('cuda')\n",
    "    generate_kwargs = dict(\n",
    "        input_ids=inputs,\n",
    "        max_new_tokens=128,\n",
    "        top_p=0.95,\n",
    "        top_k=50,\n",
    "        temperature=0.0,\n",
    "        do_sample=False,\n",
    "        repetition_penalty=1.05,\n",
    "        streamer=streamer\n",
    "    )\n",
    "    generation_output = m.generate(**generate_kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "5931ee57",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<|im_start|>system\n",
      "You are MaLLaM, created by Mesolitica. You are a helpful assistant.<|im_end|>\n",
      "<|im_start|>user\n",
      "camne nk selesaikan masalah hutang negara, reply in manglish<|im_end|>\n",
      "<|im_start|>assistant\n",
      "Wah lau eh, settle Malaysia's gahmen debt not easy lah, need do many things and take long time. Here got some ideas to help:\n",
      "\n",
      "1. Make economy grow faster: If economy go up up, then gahmen can make more money from taxes and other things. Must do things like make business easier, invest in education and training, and build more stuff.\n",
      "\n",
      "2. Cut down on spending: Gahmen must be careful with money lah. Cannot anyhow spend. Must see which things really important, then cut the rest. Also must make sure all the money spent properly, no corruption or\n"
     ]
    }
   ],
   "source": [
    "d = [\n",
    "    {'role': 'user', 'content': 'camne nk selesaikan masalah hutang negara, reply in manglish'}\n",
    "]\n",
    "\n",
    "with torch.no_grad():\n",
    "    inputs = tokenizer.apply_chat_template(d, return_tensors = 'pt').to('cuda')\n",
    "    generate_kwargs = dict(\n",
    "        input_ids=inputs,\n",
    "        max_new_tokens=128,\n",
    "        top_p=0.95,\n",
    "        top_k=50,\n",
    "        temperature=0.0,\n",
    "        do_sample=False,\n",
    "        repetition_penalty=1.05,\n",
    "        streamer=streamer\n",
    "    )\n",
    "    generation_output = m.generate(**generate_kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "a014b8de",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<|im_start|>system\n",
      "You are MaLLaM, created by Mesolitica. You are a helpful assistant.<|im_end|>\n",
      "<|im_start|>user\n",
      "camne nk selesaikan masalah hutang negara, reply in jawi<|im_end|>\n",
      "<|im_start|>assistant\n",
      "مڠورڠكن هوتڠ نڬارا مليسيا اداله توڬس يڠ كومڤليك س يڠ ممرلوكن ڤندكتن ڤلباڬاي روڤا. برايكوت اياله ببراڤ لڠكه يڠ بوليه دأمبيل اونتوق مناڠني ايسو ايت:\n",
      "\n",
      "١. مڠورڠكن ڤربلنجأن كراجأن: كراجأن بوليه مڠورڠكن ڤربلنجأنڽ دڠن م\n"
     ]
    }
   ],
   "source": [
    "d = [\n",
    "    {'role': 'user', 'content': 'camne nk selesaikan masalah hutang negara, reply in jawi'}\n",
    "]\n",
    "\n",
    "with torch.no_grad():\n",
    "    inputs = tokenizer.apply_chat_template(d, return_tensors = 'pt').to('cuda')\n",
    "    generate_kwargs = dict(\n",
    "        input_ids=inputs,\n",
    "        max_new_tokens=128,\n",
    "        top_p=0.95,\n",
    "        top_k=50,\n",
    "        temperature=0.0,\n",
    "        do_sample=False,\n",
    "        repetition_penalty=1.05,\n",
    "        streamer=streamer\n",
    "    )\n",
    "    generation_output = m.generate(**generate_kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "671ca267",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<|im_start|>system\n",
      "You are MaLLaM, created by Mesolitica. You are a helpful assistant.<|im_end|>\n",
      "<|im_start|>user\n",
      "camne nk selesaikan masalah hutang negara, reply in kedah<|im_end|>\n",
      "<|im_start|>assistant\n",
      "Nak selesai masalah hutang negara ni, kena buat macam-macam la. Ni aku nak bagitau hang apa yang boleh buat:\n",
      "\n",
      "1. Kena tambah duit masuk: Kerajaan kena cari cara nak bagi lebih banyak duit masuk negara. Boleh buat macam-macam - tarik orang luar datang labur, buat bisnes baru, kurangkan rasuah, pastu buat kerja elok-elok.\n",
      "\n",
      "2. Kurangkan duit keluar: Kerajaan pun kena j\n"
     ]
    }
   ],
   "source": [
    "d = [\n",
    "    {'role': 'user', 'content': 'camne nk selesaikan masalah hutang negara, reply in kedah'}\n",
    "]\n",
    "\n",
    "with torch.no_grad():\n",
    "    inputs = tokenizer.apply_chat_template(d, return_tensors = 'pt').to('cuda')\n",
    "    generate_kwargs = dict(\n",
    "        input_ids=inputs,\n",
    "        max_new_tokens=128,\n",
    "        top_p=0.95,\n",
    "        top_k=50,\n",
    "        temperature=0.0,\n",
    "        do_sample=False,\n",
    "        repetition_penalty=1.05,\n",
    "        streamer=streamer\n",
    "    )\n",
    "    generation_output = m.generate(**generate_kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "2fef6502",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-12-17 10:16:40,073] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/bin/ld: cannot find -laio: No such file or directory\n",
      "collect2: error: ld returned 1 exit status\n",
      "/usr/bin/ld: cannot find -laio: No such file or directory\n",
      "collect2: error: ld returned 1 exit status\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "86d2f1fbcfb54d549842bebed91acfc9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00001-of-00006.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "79287064131d4a58a542e53951d25ae8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00004-of-00006.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "826dc915d46b4fdb8cabf9e7514e26c8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00006-of-00006.safetensors:   0%|          | 0.00/4.73G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1e082d57744a46a1936597a0e832cdea",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00003-of-00006.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9e259a76c92c44c09de51eeb567d4df3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Upload 6 LFS files:   0%|          | 0/6 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ce9051b7c85f4a13865964834420aa4d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00002-of-00006.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c5562cdafb104301b43ec51cd8bc93d9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00005-of-00006.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/mesolitica/malaysian-Qwen-Qwen2.5-14B-Instruct/commit/6e68be32d4bbb7053f8ce31cba9c030b4cb4fb01', commit_message='Upload Qwen2ForCausalLM', commit_description='', oid='6e68be32d4bbb7053f8ce31cba9c030b4cb4fb01', pr_url=None, repo_url=RepoUrl('https://huggingface.co/mesolitica/malaysian-Qwen-Qwen2.5-14B-Instruct', endpoint='https://huggingface.co', repo_type='model', repo_id='mesolitica/malaysian-Qwen-Qwen2.5-14B-Instruct'), pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m.push_to_hub('mesolitica/malaysian-Qwen-Qwen2.5-14B-Instruct', private = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cae5321e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
